********************************************************************************
* Objective: Create Manufacturing share of exports data
********************************************************************************

********************************************************************************
**# Step 1: Load and format Manufactures exports (% of merchandise exports) data
/*******************************************************************************
Source: World Bank WDI
Notes: We take 0s as missing values
       Data accessed 01.07.2020
*******************************************************************************/

clear all
import excel "raw_datasets/mfg_sh_x.xlsx", sheet("Data") firstrow clear

* Dropping 2020
drop y2020

* Genereting missing values
quietly{
	foreach var of varlist y1960-y2019{
	replace `var' = "" if `var' == ".." | `var' == "0"
	}
destring y*, replace
}
	
* Matching variable name of country code with the one used in the main dataset
rename CountryCode code_wb

* Generating year 2020, dropping 2019
rename y2019 y2020

* Reshaping into long shape and renaming variable
reshape long y, i(code_wb) j(year)
rename y mfg_sh_x

* Keeping only variables of interest
keep code_wb year mfg_sh_x

* Keeping only desired countries
merge m:1 code_wb using "processed_datasets/ccode", keepusing(country_wb)
keep if _merge == 3
drop _merge country_wb
	
* Saving
save "processed_datasets/dataset_mfg", replace
	
********************************************************************************
**# Step 2.1: Missing Data - Approximating 2020
********************************************************************************

*-------------------------------------------------------------------------------
* There are no observations for 1960 and 1961, so we take 1962 as 1960

drop if year == 1960 | year == 1961
replace year = 1960 if year == 1962

* Approximating 2020 with previous values (2016 to 2018)
* If we relace 2020 with, say, 2018, we then take 2018 as missing.

sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if mfg_sh_x != . & mfg_sh_x[58] == . & year == 2018
by code_wb: replace mfg_sh_x = mfg_sh_x[57] if mfg_sh_x == . & year == 2020
by code_wb: replace identi = 1 if mfg_sh_x != . & mfg_sh_x[58] == . & year == 2017
by code_wb: replace mfg_sh_x = mfg_sh_x[56] if mfg_sh_x == . & year == 2020
by code_wb: replace identi = 1 if mfg_sh_x != . & mfg_sh_x[58] == . & year == 2016
by code_wb: replace mfg_sh_x = mfg_sh_x[55] if mfg_sh_x == . & year == 2020
replace mfg_sh_x = . if identi == 1
drop identi

********************************************************************************
**# Step 2.1: Missing Data - Incorporating GJV2016 Data
********************************************************************************

* Modification to merge with GJV2016's data
rename mfg_sh_x mfg_x
rename code_wb ccode
replace ccode = "ZAR" if ccode == "COD"

* Merging
merge 1:1 ccode year using "raw_datasets/exports_gjv", keepusing(mfg_sh_x)
drop _merge
replace ccode = "COD" if ccode == "ZAR"
rename ccode code_wb

* Drop South Sudan as it doesn't have values
drop if code_wb == "SSD"

********************************************************************************
**# Step 2.1: Missing Data - Using linear interpolation
********************************************************************************

sort code_wb year
by code_wb: ipolate mfg_x year, gen(intmfg)

* List of countries replaced with interpolation
local country_list_int "UGA TZA SLB OMN NER LBN KEN JAM IRN HND ETH DOM CAF BOL BHS BHR YEM SLE PAN NPL MOZ LSO LKA DZA CMR AFG MAC IRQ GIN AGO MLI LBY BGD"

foreach paisl of local country_list_int{
	replace mfg_x = intmfg if mfg_x == . & code_wb == "`paisl'"
}

drop intmfg

********************************************************************************
**# Step 2.1: Missing Data - Case-by-case
********************************************************************************

* Bangladesh - S: WTO (https://data.wto.org/) - data is estimated - We take 2019 as 2020

replace mfg_x = 96.4384676 if year == 2020 & code_wb == "BGD"
replace mfg_x = 96.537756 if year == 2018 & code_wb == "BGD"
replace mfg_x = 96.0754233 if year == 2017 & code_wb == "BGD"
replace mfg_x = 94.2597581 if year == 2016 & code_wb == "BGD"

* Gabon - S: WTO (https://data.wto.org/) - data is estimated - We take 2019 as 2020

replace mfg_x = 4.283029001 if year == 2020 & code_wb == "GAB"
replace mfg_x = 4.276955286 if year == 2018 & code_wb == "GAB"
replace mfg_x = 3.794772507 if year == 2017 & code_wb == "GAB"
replace mfg_x = 4.902186421 if year == 2016 & code_wb == "GAB"
replace mfg_x = 2.992372384 if year == 2015 & code_wb == "GAB"
replace mfg_x = 2.159227986 if year == 2014 & code_wb == "GAB"
replace mfg_x = 1.643589999 if year == 2013 & code_wb == "GAB"
replace mfg_x = 1.738122827 if year == 2012 & code_wb == "GAB"
replace mfg_x = 1.925046078 if year == 2011 & code_wb == "GAB"

* Haiti - S: WTO (https://data.wto.org/) - data is estimated - We take 2019 as 2020

replace mfg_x = 88.91666667 if year == 2020 & code_wb == "HTI"
replace mfg_x = 91.83673469 if year == 2018 & code_wb == "HTI"
replace mfg_x = 90.81632653 if year == 2017 & code_wb == "HTI"
replace mfg_x = 91.68356998 if year == 2016 & code_wb == "HTI"
replace mfg_x = 96.93877551 if year == 2015 & code_wb == "HTI"
replace mfg_x = 95.57894737 if year == 2014 & code_wb == "HTI"
replace mfg_x = 95.70621469 if year == 2013 & code_wb == "HTI"
replace mfg_x = 94.72392638 if year == 2012 & code_wb == "HTI"
replace mfg_x = 94.52411995 if year == 2011 & code_wb == "HTI"

* Trinidad and Tobago - S: WTO (https://data.wto.org/) - data is estimated - We take 2019 as 2020

replace mfg_x = 41.279 if year == 2020 & code_wb == "TTO"
replace mfg_x = 41.243 if year == 2018 & code_wb == "TTO"
replace mfg_x = 45.572 if year == 2017 & code_wb == "TTO"
replace mfg_x = 47.216 if year == 2016 & code_wb == "TTO"

* Taiwan - S: ITC Trade Map - We take 2019 as 2020

replace mfg_x = 93.7 if year == 2020 & code_wb == "TWN"
replace mfg_x = 93.6 if year == 2018 & code_wb == "TWN"
replace mfg_x = 94.6 if year == 2017 & code_wb == "TWN"
replace mfg_x = 93.7 if year == 2016 & code_wb == "TWN"
replace mfg_x = 91.5 if year == 2015 & code_wb == "TWN"
replace mfg_x = 90.3 if year == 2014 & code_wb == "TWN"
replace mfg_x = 90.7 if year == 2013 & code_wb == "TWN"

* DRC - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020 and 2013 as 2013

replace mfg_x = 1.66 if year == 2020 & code_wb == "COD"
replace mfg_x = 2.23 if year == 2018 & code_wb == "COD"
replace mfg_x = 10.32 if year == 2017 & code_wb == "COD"
replace mfg_x = 5.74 if year == 2016 & code_wb == "COD"
replace mfg_x = 5.86 if year == 2015 & code_wb == "COD"
replace mfg_x = 6.29 if year == 2014 & code_wb == "COD"
replace mfg_x = 6.29 if year == 2013 & code_wb == "COD"

* Equatorial Guinea - S: Official Statistics - We take 2017 as 2020 and 2018
* Notes: We take "Others" as manufactures, it's almost insignificant

replace mfg_x = 1.4 if year == 2020 & code_wb == "GNQ"
replace mfg_x = 1.4 if year == 2018 & code_wb == "GNQ"
replace mfg_x = 1.4 if year == 2017 & code_wb == "GNQ"
replace mfg_x = 1.5 if year == 2016 & code_wb == "GNQ"
replace mfg_x = 1.1 if year == 2015 & code_wb == "GNQ"
replace mfg_x = 0.5 if year == 2014 & code_wb == "GNQ"
replace mfg_x = 0.7 if year == 2013 & code_wb == "GNQ"

* Papua New Guinea - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020

replace mfg_x = 5.17 if year == 2020 & code_wb == "PNG"
replace mfg_x = 5.44 if year == 2018 & code_wb == "PNG"
replace mfg_x = 4.08 if year == 2017 & code_wb == "PNG"
replace mfg_x = 2.92 if year == 2016 & code_wb == "PNG"
replace mfg_x = 2.82 if year == 2015 & code_wb == "PNG"
replace mfg_x = 4.34 if year == 2014 & code_wb == "PNG"
replace mfg_x = 3.62 if year == 2013 & code_wb == "PNG"

* Venezuela - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020

replace mfg_x = 5.3 if year == 2020 & code_wb == "VEN"
replace mfg_x = 3.8 if year == 2018 & code_wb == "VEN"
replace mfg_x = 5.7 if year == 2017 & code_wb == "VEN"
replace mfg_x = 5 if year == 2016 & code_wb == "VEN"
replace mfg_x = 4.8 if year == 2015 & code_wb == "VEN"
replace mfg_x = 3.7 if year == 2014 & code_wb == "VEN"

* Bhutan - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020 and 2014 as 2013

replace mfg_x = 66.8 if year == 2020 & code_wb == "BTN"
replace mfg_x = 66.8 if year == 2018 & code_wb == "BTN"
replace mfg_x = 68.7 if year == 2017 & code_wb == "BTN"
replace mfg_x = 84.9 if year == 2016 & code_wb == "BTN"
replace mfg_x = 60.7 if year == 2015 & code_wb == "BTN"
replace mfg_x = 83.7 if year == 2014 & code_wb == "BTN"
replace mfg_x = 83.7 if year == 2013 & code_wb == "BTN"

* Chad - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020

replace mfg_x = 0.2 if year == 2020 & code_wb == "TCD"
replace mfg_x = 0.3 if year == 2018 & code_wb == "TCD"
replace mfg_x = 0.5 if year == 2017 & code_wb == "TCD"
replace mfg_x = 0.2 if year == 2016 & code_wb == "TCD"
replace mfg_x = 0.8 if year == 2015 & code_wb == "TCD"
replace mfg_x = 0.2 if year == 2014 & code_wb == "TCD"
replace mfg_x = 0.3 if year == 2013 & code_wb == "TCD"

* Cuba - S: Official Statistics from Cuba - We take 2018 as 2020
* Notes: We take Nickel exports as ores, so not counted here.

replace mfg_x = 28.01 if year == 2020 & code_wb == "CUB"
replace mfg_x = . if year == 2018 & code_wb == "CUB"
replace mfg_x = 21.5 if year == 2017 & code_wb == "CUB"
replace mfg_x = 26.6 if year == 2016 & code_wb == "CUB"
replace mfg_x = 23.7 if year == 2015 & code_wb == "CUB"
replace mfg_x = 18 if year == 2014 & code_wb == "CUB"
replace mfg_x = 17.5 if year == 2013 & code_wb == "CUB"

sort code_wb year
by code_wb: ipolate mfg_x year, gen(intmfg)
replace mfg_x = intmfg if mfg_x == . & code_wb == "CUB"
drop intmfg

* Somalia - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020

replace mfg_x = 4.2 if year == 2020 & code_wb == "SOM"
replace mfg_x = 4.6 if year == 2018 & code_wb == "SOM"
replace mfg_x = 1.5 if year == 2017 & code_wb == "SOM"
replace mfg_x = 0.6 if year == 2016 & code_wb == "SOM"
replace mfg_x = 1.0 if year == 2015 & code_wb == "SOM"
replace mfg_x = 1.4 if year == 2014 & code_wb == "SOM"
replace mfg_x = 1.7 if year == 2013 & code_wb == "SOM"

* Syria - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020

replace mfg_x = 15.9 if year == 2020 & code_wb == "SYR"
replace mfg_x = 14 if year == 2018 & code_wb == "SYR"
replace mfg_x = 14.9 if year == 2017 & code_wb == "SYR"
replace mfg_x = 18.4 if year == 2016 & code_wb == "SYR"
replace mfg_x = 19.6 if year == 2015 & code_wb == "SYR"
replace mfg_x = 19.5 if year == 2014 & code_wb == "SYR"
replace mfg_x = 27.5 if year == 2013 & code_wb == "SYR"

* Sudan - S: ITC Trade Map (Mirror Data) - We take 2019 as 2020 and 2012 as 2013
* Notes: we combine south sudan and sudan to form a single country

replace mfg_x = 1.5 if year == 2020 & code_wb == "SDN"
replace mfg_x = 1.9 if year == 2018 & code_wb == "SDN"
replace mfg_x = 1.1 if year == 2017 & code_wb == "SDN"
replace mfg_x = 2.0 if year == 2016 & code_wb == "SDN"
replace mfg_x = 1.8 if year == 2015 & code_wb == "SDN"
replace mfg_x = 0.7 if year == 2014 & code_wb == "SDN"
replace mfg_x = 3.4 if year == 2013 & code_wb == "SDN"

* Liberia - S: Official statistics from Liberia - We take 2016 as 2020

replace mfg_x = 37.2 if year == 2020 & code_wb == "LBR"
replace mfg_x = . if year == 2018 & code_wb == "LBR"
replace mfg_x = . if year == 2017 & code_wb == "LBR"
replace mfg_x = . if year == 2016 & code_wb == "LBR"
replace mfg_x = 32 if year == 2015 & code_wb == "LBR"
replace mfg_x = 13.1 if year == 2014 & code_wb == "LBR"
replace mfg_x = 32.1 if year == 2013 & code_wb == "LBR"

sort code_wb year
by code_wb: ipolate mfg_x year, gen(intmfg)
replace mfg_x = intmfg if mfg_x == . & code_wb == "LBR"
drop intmfg

* Eritrea - Sources: ITC (mirror data) - Compatible with ores and metals dataset
replace mfg_x = 0.7 if year == 2020 & code_wb == "ERI"
replace mfg_x = 0.7 if year == 2018 & code_wb == "ERI"
replace mfg_x = 1.7 if year == 2017 & code_wb == "ERI"
replace mfg_x = 0.9 if year == 2016 & code_wb == "ERI"
replace mfg_x = 1.7 if year == 2015 & code_wb == "ERI"
replace mfg_x = 1.3 if year == 2014 & code_wb == "ERI"
replace mfg_x = 4.0 if year == 2013 & code_wb == "ERI"

*-------------------------------------------------------------------------------
* Corrections

* Botswana - S: ITC Trade Map - We take 2019 as 2020
* Notes: World Bank Data seems to incorporate mining exports

replace mfg_x = 5.7 if year == 2020 & code_wb == "BWA"
replace mfg_x = 6.5 if year == 2018 & code_wb == "BWA"
replace mfg_x = 6.1 if year == 2017 & code_wb == "BWA"
replace mfg_x = 5.4 if year == 2016 & code_wb == "BWA"
replace mfg_x = 6.9 if year == 2015 & code_wb == "BWA"
replace mfg_x = 4.8 if year == 2014 & code_wb == "BWA"
replace mfg_x = 5.4 if year == 2013 & code_wb == "BWA"

* Republic of Congo - S: ITC Trade Map - We take 2019 as 2020 - Excluding "ships" exports
* Notes: World Bank Data seems to add a substantial ammount of ships exports

replace mfg_x = 2.9 if year == 2020 & code_wb == "COG"
replace mfg_x = 2 if year == 2018 & code_wb == "COG"
replace mfg_x = 8.1 if year == 2017 & code_wb == "COG"
replace mfg_x = 15.7 if year == 2016 & code_wb == "COG"
replace mfg_x = 5.8 if year == 2015 & code_wb == "COG"
replace mfg_x = 6.0 if year == 2014 & code_wb == "COG"
replace mfg_x = 1.9 if year == 2013 & code_wb == "COG"

* Guyana - We take 2016 as the last year of good data. After that there 
* seems to be contamination from ship and container exports which may
* be an issue with the flag of the ships and not MFG production itself

replace mfg_x = . if year > 2015 & code_wb == "GUY"
replace mfg_x = 6.2515265 if year == 2020 & code_wb == "GUY"

sort code_wb year
by code_wb: ipolate mfg_x year, gen(intmfg)
replace mfg_x = intmfg if mfg_x == . & code_wb == "GUY"
drop intmfg

* Hong Kong - S: WTO (https://data.wto.org/) - Estimates - We take 2019 as 2020

replace mfg_x = 91.8 if year == 2020 & code_wb == "HKG"
replace mfg_x = 89.8 if year == 2018 & code_wb == "HKG"
replace mfg_x = 86.8 if year == 2017 & code_wb == "HKG"
replace mfg_x = 86.1 if year == 2016 & code_wb == "HKG"
replace mfg_x = 87.8 if year == 2015 & code_wb == "HKG"
replace mfg_x = 86.7 if year == 2014 & code_wb == "HKG"
replace mfg_x = 82.2 if year == 2013 & code_wb == "HKG"

* Jamaica - S: ITC Trade Map - We take 2019 as 2020

replace mfg_x = 52 if year == 2020 & code_wb == "JAM"
replace mfg_x = 60.3 if year == 2018 & code_wb == "JAM"
replace mfg_x = 45.6 if year == 2017 & code_wb == "JAM"
replace mfg_x = 46.0 if year == 2016 & code_wb == "JAM"
replace mfg_x = 47.7 if year == 2015 & code_wb == "JAM"
replace mfg_x = 42.6 if year == 2014 & code_wb == "JAM"
replace mfg_x = 39.9 if year == 2013 & code_wb == "JAM"

* Lebanon - S: ITC Trade Map - We take 2019 as 2020
* Notes: We need to clean the data from the re-exports of gold and diamonds

replace mfg_x = 65.6 if year == 2020 & code_wb == "LBN"
replace mfg_x = 63.3 if year == 2018 & code_wb == "LBN"
replace mfg_x = 63.2 if year == 2017 & code_wb == "LBN"
replace mfg_x = 69.5 if year == 2016 & code_wb == "LBN"
replace mfg_x = 70.4 if year == 2015 & code_wb == "LBN"
replace mfg_x = 69.3 if year == 2014 & code_wb == "LBN"
replace mfg_x = 63.3 if year == 2013 & code_wb == "LBN"

* Macao - S: ITC Trade Map - We take 2019 as 2020
* Notes: WB Data seems wrong - Interpolation for 2017 and 2013

replace mfg_x = 94.8 if year == 2020 & code_wb == "MAC"
replace mfg_x = 89.1 if year == 2018 & code_wb == "MAC"
replace mfg_x = 92.9 if year == 2017 & code_wb == "MAC"
replace mfg_x = 97.6 if year == 2016 & code_wb == "MAC"
replace mfg_x = 97.1 if year == 2015 & code_wb == "MAC"
replace mfg_x = 90.7 if year == 2014 & code_wb == "MAC"
replace mfg_x = 89.2 if year == 2013 & code_wb == "MAC"

* Namibia - S: ITC Trade Map - We take 2019 as 2020
* Notes: WB Data incorporates some mineral exports

replace mfg_x = 12.2 if year == 2020 & code_wb == "NAM"
replace mfg_x = 18.6 if year == 2018 & code_wb == "NAM"
replace mfg_x = 11.4 if year == 2017 & code_wb == "NAM"
replace mfg_x = 14.5 if year == 2016 & code_wb == "NAM"
replace mfg_x = 10.9 if year == 2015 & code_wb == "NAM"
replace mfg_x = 27.3 if year == 2014 & code_wb == "NAM"
replace mfg_x = 28.5 if year == 2013 & code_wb == "NAM"

* Panama - S: Oficial statistics from Panama - We use 2017 as 2020 - Interpolation for 2018,2017 and 2014.
* Notes: We eliminate the enormous jump in manufacture share in exports seen in the data from WB

replace mfg_x = 22.3 if year == 2020 & code_wb == "PAN"
replace mfg_x = 21.8 if year == 2018 & code_wb == "PAN"
replace mfg_x = 21.5 if year == 2017 & code_wb == "PAN"
replace mfg_x = 21.2 if year == 2016 & code_wb == "PAN"
replace mfg_x = 23.5 if year == 2015 & code_wb == "PAN"
replace mfg_x = 22.5 if year == 2014 & code_wb == "PAN"
replace mfg_x = 21.4 if year == 2013 & code_wb == "PAN"

* Togo - S: ITC Trade Map - We take 2019 as 2020
* Notes: We take gold and oil exports as re-exports.

replace mfg_x = 32.9 if year == 2020 & code_wb == "TGO"
replace mfg_x = 33.5 if year == 2018 & code_wb == "TGO"
replace mfg_x = 31.4 if year == 2017 & code_wb == "TGO"
replace mfg_x = 35.6 if year == 2016 & code_wb == "TGO"
replace mfg_x = 35.1 if year == 2015 & code_wb == "TGO"
replace mfg_x = 32.6 if year == 2014 & code_wb == "TGO"
replace mfg_x = 35.1 if year == 2013 & code_wb == "TGO"

* Tanzania
* Notes: We take 2017 as 2020, as 2018 seems to be an outlier

replace mfg_x = . if year >	 2016 & code_wb == "TZA"
replace mfg_x = 15.861602 if year == 2020 & code_wb == "TZA"

sort code_wb year
by code_wb: ipolate mfg_x year, gen(intmfg)
replace mfg_x = intmfg if mfg_x == . & code_wb == "TZA"
drop intmfg

* Central African Republic: - S: ITC Trade Map - We take 2019 as 2020

* Data from World Bank (and also ITC), provides very high values for CAR in some years 
* of the 2010s. These coincides with embargos on the exports of diamonds, which could be generating the issue.
* We take 2013 as the last year of good data from WB, replace 2020 with 2019 ITC Data
* And then take linear interpolation

replace mfg_x = . if year > 2013 & code_wb == "CAF"
replace mfg_x = 6.1133 if year == 2020 & code_wb == "CAF"

sort code_wb year
by code_wb: ipolate mfg_x year, gen(intmfg)
replace mfg_x = intmfg if mfg_x == . & code_wb == "CAF"
drop intmfg

********************************************************************************
**# Step 3: Finalize Data - Estimate moving averages
********************************************************************************

sort code_wb year

* Generating Variable ma1
gen mfg_x_ma1 = 0

* Estimating ma1 for the sample except 1960 and 2020
foreach ma1 of numlist 2/57{
	by code_wb: replace mfg_x_ma1 = (mfg_x[`ma1'-1] + mfg_x[`ma1'] + mfg_x[`ma1'+1])/3 if _n == `ma1' 
}
	
* Estimating ma1 for 1960 and 2020
by code_wb: replace mfg_x_ma1 = (mfg_x[1] + mfg_x[2])/2 if _n == 1
by code_wb: replace mfg_x_ma1 = (mfg_x[58] + mfg_x[57])/2 if _n == 58

* Taking moving averages with a window of 2

* Generating Variable ma2
gen mfg_x_ma2 = 0

foreach ma2 of numlist 3/56{
	by code_wb: replace mfg_x_ma2 = ///
	(mfg_x[`ma2'-2] + mfg_x[`ma2'-1] + mfg_x[`ma2'] + mfg_x[`ma2'+1] + mfg_x[`ma2'+2])/5 ///
	if _n == `ma2' 
}

* Estimating ma2 for 1960, 1962, 2017 and 2020
by code_wb: replace mfg_x_ma2 = (mfg_x[1] + mfg_x[2] + mfg_x[3])/3 if _n == 1
by code_wb: replace mfg_x_ma2 = (mfg_x[1] + mfg_x[2] + mfg_x[3] + mfg_x[4])/4 if _n == 2
by code_wb: replace mfg_x_ma2 = (mfg_x[55] + mfg_x[56] + mfg_x[57] + mfg_x[58])/4 if _n == 57
by code_wb: replace mfg_x_ma2 = (mfg_x[56] + mfg_x[57] + mfg_x[58])/3 if _n == 58

*-------------------------------------------------------------------------------
* MOMENTARY FIXES
*-------------------------------------------------------------------------------

foreach momfix in mfg_x mfg_x_ma1 mfg_x_ma2{
    
* Assume no change
replace `momfix' = 63.200001 if year == 2020 & code_wb == "DJI"
replace `momfix' = 63.200001 if year == 2015 & code_wb == "DJI"

* Source: OEC (https://oec.world/en)
replace `momfix' = 0.7 if year == 2020 & code_wb == "GNB"
replace `momfix' = 0.3 if year == 2015 & code_wb == "GNB"
}

********************************************************************************
**# Step 3: Finalize Data - Combine with GJV2016 and save
********************************************************************************

* Keep desired years
keep if year == 1960 | year == 1965 | year == 1970 | year == 1975 | year == 1980 | year == 1985 | year == 1990 | year == 1995 ///
| year == 2000 | year == 2005 | year == 2010 | year == 2015 | year == 2020

gen mfg_sh_x_c = mfg_sh_x
gen mfg_sh_x_c_ma1 = mfg_sh_x
gen mfg_sh_x_c_ma2 = mfg_sh_x

* Replace with GJV2016 for years < 2015
replace mfg_sh_x_c = mfg_x if year > 2014
replace mfg_sh_x_c_ma1 = mfg_x_ma1 if year > 2014
replace mfg_sh_x_c_ma2 = mfg_x_ma2 if year > 2014

*-------------------------------------------------------------------------------
* Corrections

replace mfg_sh_x_c = mfg_x if year == 2010 & code_wb == "BRN"
replace mfg_sh_x_c_ma1 = mfg_x_ma1 if year == 2010 & code_wb == "BRN"
replace mfg_sh_x_c_ma2 = mfg_x_ma2 if year == 2010 & code_wb == "BRN"

save "processed_datasets/dataset_mfg", replace
